Install detectron2¶

In [1]:
# !conda create -n myenv python=3.9 -y
#!source /root/anaconda3/etc/profile.d/conda.sh
#!conda activate myenv
In [2]:
#!python -m pip install pyyaml==5.1
# Detectron2 has not released pre-built binaries for the latest pytorch (https://github.com/facebookresearch/detectron2/issues/4053)
# so we install from source instead. This takes a few minutes.
#!python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

# Install pre-built detectron2 that matches pytorch version, if released:
# See https://detectron2.readthedocs.io/tutorials/install.html for instructions
#!pip install detectron2 -f https://dl.fbaipublicfiles.com/detectron2/wheels/{CUDA_VERSION}/{TORCH_VERSION}/index.html

# exit(0)  # After installation, you may need to "restart runtime" in Colab. This line can also restart runtime
In [3]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2021 NVIDIA Corporation
Built on Thu_Nov_18_09:45:30_PST_2021
Cuda compilation tools, release 11.5, V11.5.119
Build cuda_11.5.r11.5/compiler.30672275_0
torch:  1.9 ; cuda:  cu111
detectron2: 0.6
In [4]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
#from google.colab.patches import cv2_imshow

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog

Run a pre-trained detectron2 model¶

Train on a custom dataset¶

In [5]:
import json
  
# Opening JSON file
f = open('/root/raghav/labelstudio/result.json')
  
dataset_dicts = json.load(f)
In [6]:
dataset_dicts['images'][1]
Out[6]:
{'width': 1535,
 'height': 2730,
 'id': 1,
 'file_name': 'images/11/82c7c2ca-11.jpg'}
In [7]:
dataset_dicts['categories']
Out[7]:
[{'id': 0, 'name': 'Wire'}]
In [8]:
dataset_dicts.keys()
Out[8]:
dict_keys(['images', 'categories', 'annotations', 'info'])
In [9]:
!ls /root/raghav/labelstudio/images/11
03865ad2-25.jpg		765ef5c1-9.jpg	       a525426b-24.jpg
0571f85d-4.jpg		7b2945ef-IMG_0340.JPG  a7bd461c-8.JPG
060aaccc-IMG_0345.JPG	7e632d99-IMG_0306.JPG  a99d9f5b-IMG_0323.JPG
16bee31b-1.jpg		7f355d57-2.JPG	       ab3b01ca-IMG_0352.JPG
1af4a433-IMG_0354.JPG	7fc61b3a-IMG_0318.JPG  abfcc678-IMG_0339.JPG
23c6bf74-5.jpg		82c7c2ca-11.jpg        acef4032-IMG_0342.JPG
2f68429a-IMG_0333.JPG	838b8505-IMG_0330.JPG  b6e1149a-IMG_0329.JPG
30b24faf-21.jpg		83fd0d53-IMG_0304.JPG  bdcfba0e-14.jpg
3576ba5b-20.jpg		859c9222-IMG_0313.JPG  bf411d4c-IMG_0294.JPG
39fa94e9-19.jpg		8b4b117d-IMG_0344.JPG  cd6ea9ce-IMG_0355.JPG
3a9d0723-IMG_0346.JPG	8b92b444-IMG_0322.JPG  ce39d6ee-IMG_0335.JPG
438005ea-IMG_0326.JPG	8cde8e52-3.jpg	       cfb5b88a-IMG_0301.JPG
4946c5fb-IMG_0331.JPG	8ce904be-IMG_0348.JPG  dd70db44-IMG_0343.JPG
4bd013d1-IMG_0303.JPG	8d9688df-17.jpg        de03b894-7.jpg
52df14cb-12.jpg		8f5d6a91-IMG_0332.JPG  dec01bf9-13.jpg
5b1dbf69-IMG_E0297.JPG	9152cc57-IMG_0349.JPG  e0400583-IMG_0347.JPG
5d1b016a-IMG_0336.JPG	9259c363-IMG_0311.JPG  e1a5c9ad-IMG_0312.JPG
5fae8912-IMG_0338.JPG	9505cd1e-IMG_0299.JPG  e3423fba-IMG_0308.JPG
60708a60-IMG_0316.JPG	96c90a35-16.jpg        e4f2229d-IMG_0337.JPG
62c4d761-IMG_0334.JPG	9a34f8a1-IMG_0320.JPG  e73b6357-10.jpg
659fbde8-18.jpg		9f7b7a1f-IMG_0298.JPG  ed89dda7-IMG_0321.JPG
69db667b-15.JPG		a1a4c73b-IMG_0295.JPG  f2724ae4-IMG_0319.JPG
6cf19201-IMG_0309.JPG	a29d1440-23.jpg        f2b94273-IMG_0350.JPG
6d9d2d6f-IMG_0359.JPG	a2f9206c-IMG_0353.JPG  f66ef193-22.jpg
74f14799-IMG_0300.JPG	a3bb42c6-IMG_0302.JPG  fbbba93f-6.JPG
In [10]:
from detectron2.data.datasets import register_coco_instances
register_coco_instances("my_dataset8", {}, "/root/raghav/labelstudio/result.json", "/root/raghav/labelstudio/")
In [11]:
#register_coco_instances("my_dataset_val", {}, "json_annotation_val.json", "path/to/image/dir")
In [12]:
# if your dataset is in COCO format, this cell can be replaced by the following three lines:
# from detectron2.data.datasets import register_coco_instances
# register_coco_instances("my_dataset_train", {}, "json_annotation_train.json", "path/to/image/dir")
# register_coco_instances("my_dataset_val", {}, "json_annotation_val.json", "path/to/image/dir")

from detectron2.structures import BoxMode

Train!¶

Now, let's fine-tune a COCO-pretrained R50-FPN Mask R-CNN model on the balloon dataset. It takes ~2 minutes to train 300 iterations on a P100 GPU.

In [14]:
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_dataset8",)
cfg.DATASETS.TEST = ("my_dataset8",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_101_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 1000    # 300 iterations seems good enough for this toy dataset; you will need to train longer for a practical dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128   # The "RoIHead batch size". 128 is faster, and good enough for this toy dataset (default: 512)
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # only has one class (ballon). (see https://detectron2.readthedocs.io/tutorials/datasets.html#update-the-config-for-new-datasets)
# NOTE: this config means the number of classes, but a few popular unofficial tutorials incorrect uses num_classes+1 here.
In [15]:
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
In [16]:
trainer = DefaultTrainer(cfg) 
trainer.resume_or_load(resume=False)
[01/03 13:32:01 d2.engine.defaults]: Model:
GeneralizedRCNN(
  (backbone): FPN(
    (fpn_lateral2): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(2048, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): ResNet(
      (stem): BasicStem(
        (conv1): Conv2d(
          3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False
          (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
        )
      )
      (res2): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv1): Conv2d(
            64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv2): Conv2d(
            64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv3): Conv2d(
            64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
        )
        (1): BottleneckBlock(
          (conv1): Conv2d(
            256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv2): Conv2d(
            64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv3): Conv2d(
            64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
        )
        (2): BottleneckBlock(
          (conv1): Conv2d(
            256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv2): Conv2d(
            64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=64, eps=1e-05)
          )
          (conv3): Conv2d(
            64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
        )
      )
      (res3): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv1): Conv2d(
            256, 128, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv2): Conv2d(
            128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv3): Conv2d(
            128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
        )
        (1): BottleneckBlock(
          (conv1): Conv2d(
            512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv2): Conv2d(
            128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv3): Conv2d(
            128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
        )
        (2): BottleneckBlock(
          (conv1): Conv2d(
            512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv2): Conv2d(
            128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv3): Conv2d(
            128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
        )
        (3): BottleneckBlock(
          (conv1): Conv2d(
            512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv2): Conv2d(
            128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=128, eps=1e-05)
          )
          (conv3): Conv2d(
            128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
        )
      )
      (res4): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
          (conv1): Conv2d(
            512, 256, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (1): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (2): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (3): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (4): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (5): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (6): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (7): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (8): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (9): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (10): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (11): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (12): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (13): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (14): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (15): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (16): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (17): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (18): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (19): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (20): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (21): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
        (22): BottleneckBlock(
          (conv1): Conv2d(
            1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv2): Conv2d(
            256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=256, eps=1e-05)
          )
          (conv3): Conv2d(
            256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=1024, eps=1e-05)
          )
        )
      )
      (res5): Sequential(
        (0): BottleneckBlock(
          (shortcut): Conv2d(
            1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
          )
          (conv1): Conv2d(
            1024, 512, kernel_size=(1, 1), stride=(2, 2), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv2): Conv2d(
            512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv3): Conv2d(
            512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
          )
        )
        (1): BottleneckBlock(
          (conv1): Conv2d(
            2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv2): Conv2d(
            512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv3): Conv2d(
            512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
          )
        )
        (2): BottleneckBlock(
          (conv1): Conv2d(
            2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv2): Conv2d(
            512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=512, eps=1e-05)
          )
          (conv3): Conv2d(
            512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False
            (norm): FrozenBatchNorm2d(num_features=2048, eps=1e-05)
          )
        )
      )
    )
  )
  (proposal_generator): RPN(
    (rpn_head): StandardRPNHead(
      (conv): Conv2d(
        256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
        (activation): ReLU()
      )
      (objectness_logits): Conv2d(256, 3, kernel_size=(1, 1), stride=(1, 1))
      (anchor_deltas): Conv2d(256, 12, kernel_size=(1, 1), stride=(1, 1))
    )
    (anchor_generator): DefaultAnchorGenerator(
      (cell_anchors): BufferList()
    )
  )
  (roi_heads): StandardROIHeads(
    (box_pooler): ROIPooler(
      (level_poolers): ModuleList(
        (0): ROIAlign(output_size=(7, 7), spatial_scale=0.25, sampling_ratio=0, aligned=True)
        (1): ROIAlign(output_size=(7, 7), spatial_scale=0.125, sampling_ratio=0, aligned=True)
        (2): ROIAlign(output_size=(7, 7), spatial_scale=0.0625, sampling_ratio=0, aligned=True)
        (3): ROIAlign(output_size=(7, 7), spatial_scale=0.03125, sampling_ratio=0, aligned=True)
      )
    )
    (box_head): FastRCNNConvFCHead(
      (flatten): Flatten(start_dim=1, end_dim=-1)
      (fc1): Linear(in_features=12544, out_features=1024, bias=True)
      (fc_relu1): ReLU()
      (fc2): Linear(in_features=1024, out_features=1024, bias=True)
      (fc_relu2): ReLU()
    )
    (box_predictor): FastRCNNOutputLayers(
      (cls_score): Linear(in_features=1024, out_features=2, bias=True)
      (bbox_pred): Linear(in_features=1024, out_features=4, bias=True)
    )
    (mask_pooler): ROIPooler(
      (level_poolers): ModuleList(
        (0): ROIAlign(output_size=(14, 14), spatial_scale=0.25, sampling_ratio=0, aligned=True)
        (1): ROIAlign(output_size=(14, 14), spatial_scale=0.125, sampling_ratio=0, aligned=True)
        (2): ROIAlign(output_size=(14, 14), spatial_scale=0.0625, sampling_ratio=0, aligned=True)
        (3): ROIAlign(output_size=(14, 14), spatial_scale=0.03125, sampling_ratio=0, aligned=True)
      )
    )
    (mask_head): MaskRCNNConvUpsampleHead(
      (mask_fcn1): Conv2d(
        256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
        (activation): ReLU()
      )
      (mask_fcn2): Conv2d(
        256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
        (activation): ReLU()
      )
      (mask_fcn3): Conv2d(
        256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
        (activation): ReLU()
      )
      (mask_fcn4): Conv2d(
        256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)
        (activation): ReLU()
      )
      (deconv): ConvTranspose2d(256, 256, kernel_size=(2, 2), stride=(2, 2))
      (deconv_relu): ReLU()
      (predictor): Conv2d(256, 1, kernel_size=(1, 1), stride=(1, 1))
    )
  )
)
WARNING [01/03 13:32:01 d2.data.datasets.coco]: 
Category ids in annotations are not in [1, #categories]! We'll apply a mapping for you.

[01/03 13:32:01 d2.data.datasets.coco]: Loaded 75 images in COCO format from /root/raghav/labelstudio/result.json
[01/03 13:32:01 d2.data.build]: Removed 8 images with no usable annotations. 67 images left.
[01/03 13:32:01 d2.data.build]: Distribution of instances among all 1 categories:
|  category  | #instances   |
|:----------:|:-------------|
|    Wire    | 107          |
|            |              |
[01/03 13:32:01 d2.data.dataset_mapper]: [DatasetMapper] Augmentations used in training: [ResizeShortestEdge(short_edge_length=(640, 672, 704, 736, 768, 800), max_size=1333, sample_style='choice'), RandomFlip()]
[01/03 13:32:01 d2.data.build]: Using training sampler TrainingSampler
[01/03 13:32:01 d2.data.common]: Serializing 67 elements to byte tensors and concatenating them all ...
[01/03 13:32:01 d2.data.common]: Serialized dataset takes 0.05 MiB
2023-01-03 13:32:01.913345: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-01-03 13:32:03.289056: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-01-03 13:32:05.476628: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /root/anaconda3/lib/python3.9/site-packages/cv2/../../lib64:
2023-01-03 13:32:05.476749: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /root/anaconda3/lib/python3.9/site-packages/cv2/../../lib64:
2023-01-03 13:32:05.476762: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
/root/anaconda3/lib/python3.9/site-packages/scipy/__init__.py:146: UserWarning: A NumPy version >=1.16.5 and <1.23.0 is required for this version of SciPy (detected version 1.23.0
  warnings.warn(f"A NumPy version >={np_minversion} and <{np_maxversion}"
Skip loading parameter 'roi_heads.box_predictor.cls_score.weight' to the model due to incompatible shapes: (81, 1024) in the checkpoint but (2, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.cls_score.bias' to the model due to incompatible shapes: (81,) in the checkpoint but (2,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.weight' to the model due to incompatible shapes: (320, 1024) in the checkpoint but (4, 1024) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.box_predictor.bbox_pred.bias' to the model due to incompatible shapes: (320,) in the checkpoint but (4,) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.weight' to the model due to incompatible shapes: (80, 256, 1, 1) in the checkpoint but (1, 256, 1, 1) in the model! You might want to double check if this is expected.
Skip loading parameter 'roi_heads.mask_head.predictor.bias' to the model due to incompatible shapes: (80,) in the checkpoint but (1,) in the model! You might want to double check if this is expected.
Some model parameters or buffers are not found in the checkpoint:
roi_heads.box_predictor.bbox_pred.{bias, weight}
roi_heads.box_predictor.cls_score.{bias, weight}
roi_heads.mask_head.predictor.{bias, weight}
In [17]:
#trainer.train()
In [18]:
# Look at training curves in tensorboard:
#%load_ext tensorboard
#%tensorboard --logdir output

Inference & evaluation using the trained model¶

Now, let's run inference with the trained model on the balloon validation dataset. First, let's create a predictor using the model we just trained:

In [24]:
cfg.OUTPUT_DIR
Out[24]:
'./output'
In [25]:
from detectron2.utils.visualizer import ColorMode
import matplotlib.pyplot as plt
from skimage.io import imshow, imread
from skimage.color import rgb2hsv, hsv2rgb
In [26]:
k=dataset_dicts['images'][5]['file_name']
In [27]:
im = cv2.imread("/root/raghav/labelstudio/"+k,1)
In [28]:
imshow(im)
Out[28]:
<matplotlib.image.AxesImage at 0x7f19dd4e24f0>
In [29]:
 #im1=imshow("/root/raghav/labelstudio/wires/images (7).jpeg")
In [30]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold
predictor = DefaultPredictor(cfg)
[01/03 13:33:19 d2.checkpoint.c2_model_loading]: Following weights matched with model:
| Names in Model                                  | Names in Checkpoint                                                                                  | Shapes                                          |
|:------------------------------------------------|:-----------------------------------------------------------------------------------------------------|:------------------------------------------------|
| backbone.bottom_up.res2.0.conv1.*               | backbone.bottom_up.res2.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (64,) (64,) (64,) (64,) (64,64,1,1)             |
| backbone.bottom_up.res2.0.conv2.*               | backbone.bottom_up.res2.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (64,) (64,) (64,) (64,) (64,64,3,3)             |
| backbone.bottom_up.res2.0.conv3.*               | backbone.bottom_up.res2.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,64,1,1)        |
| backbone.bottom_up.res2.0.shortcut.*            | backbone.bottom_up.res2.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (256,) (256,) (256,) (256,) (256,64,1,1)        |
| backbone.bottom_up.res2.1.conv1.*               | backbone.bottom_up.res2.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (64,) (64,) (64,) (64,) (64,256,1,1)            |
| backbone.bottom_up.res2.1.conv2.*               | backbone.bottom_up.res2.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (64,) (64,) (64,) (64,) (64,64,3,3)             |
| backbone.bottom_up.res2.1.conv3.*               | backbone.bottom_up.res2.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,64,1,1)        |
| backbone.bottom_up.res2.2.conv1.*               | backbone.bottom_up.res2.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (64,) (64,) (64,) (64,) (64,256,1,1)            |
| backbone.bottom_up.res2.2.conv2.*               | backbone.bottom_up.res2.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (64,) (64,) (64,) (64,) (64,64,3,3)             |
| backbone.bottom_up.res2.2.conv3.*               | backbone.bottom_up.res2.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,64,1,1)        |
| backbone.bottom_up.res3.0.conv1.*               | backbone.bottom_up.res3.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (128,) (128,) (128,) (128,) (128,256,1,1)       |
| backbone.bottom_up.res3.0.conv2.*               | backbone.bottom_up.res3.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (128,) (128,) (128,) (128,) (128,128,3,3)       |
| backbone.bottom_up.res3.0.conv3.*               | backbone.bottom_up.res3.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (512,) (512,) (512,) (512,) (512,128,1,1)       |
| backbone.bottom_up.res3.0.shortcut.*            | backbone.bottom_up.res3.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (512,) (512,) (512,) (512,) (512,256,1,1)       |
| backbone.bottom_up.res3.1.conv1.*               | backbone.bottom_up.res3.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (128,) (128,) (128,) (128,) (128,512,1,1)       |
| backbone.bottom_up.res3.1.conv2.*               | backbone.bottom_up.res3.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (128,) (128,) (128,) (128,) (128,128,3,3)       |
| backbone.bottom_up.res3.1.conv3.*               | backbone.bottom_up.res3.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (512,) (512,) (512,) (512,) (512,128,1,1)       |
| backbone.bottom_up.res3.2.conv1.*               | backbone.bottom_up.res3.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (128,) (128,) (128,) (128,) (128,512,1,1)       |
| backbone.bottom_up.res3.2.conv2.*               | backbone.bottom_up.res3.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (128,) (128,) (128,) (128,) (128,128,3,3)       |
| backbone.bottom_up.res3.2.conv3.*               | backbone.bottom_up.res3.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (512,) (512,) (512,) (512,) (512,128,1,1)       |
| backbone.bottom_up.res3.3.conv1.*               | backbone.bottom_up.res3.3.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (128,) (128,) (128,) (128,) (128,512,1,1)       |
| backbone.bottom_up.res3.3.conv2.*               | backbone.bottom_up.res3.3.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (128,) (128,) (128,) (128,) (128,128,3,3)       |
| backbone.bottom_up.res3.3.conv3.*               | backbone.bottom_up.res3.3.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (512,) (512,) (512,) (512,) (512,128,1,1)       |
| backbone.bottom_up.res4.0.conv1.*               | backbone.bottom_up.res4.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,512,1,1)       |
| backbone.bottom_up.res4.0.conv2.*               | backbone.bottom_up.res4.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.0.conv3.*               | backbone.bottom_up.res4.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.0.shortcut.*            | backbone.bottom_up.res4.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (1024,) (1024,) (1024,) (1024,) (1024,512,1,1)  |
| backbone.bottom_up.res4.1.conv1.*               | backbone.bottom_up.res4.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.1.conv2.*               | backbone.bottom_up.res4.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.1.conv3.*               | backbone.bottom_up.res4.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.10.conv1.*              | backbone.bottom_up.res4.10.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.10.conv2.*              | backbone.bottom_up.res4.10.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.10.conv3.*              | backbone.bottom_up.res4.10.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.11.conv1.*              | backbone.bottom_up.res4.11.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.11.conv2.*              | backbone.bottom_up.res4.11.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.11.conv3.*              | backbone.bottom_up.res4.11.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.12.conv1.*              | backbone.bottom_up.res4.12.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.12.conv2.*              | backbone.bottom_up.res4.12.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.12.conv3.*              | backbone.bottom_up.res4.12.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.13.conv1.*              | backbone.bottom_up.res4.13.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.13.conv2.*              | backbone.bottom_up.res4.13.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.13.conv3.*              | backbone.bottom_up.res4.13.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.14.conv1.*              | backbone.bottom_up.res4.14.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.14.conv2.*              | backbone.bottom_up.res4.14.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.14.conv3.*              | backbone.bottom_up.res4.14.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.15.conv1.*              | backbone.bottom_up.res4.15.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.15.conv2.*              | backbone.bottom_up.res4.15.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.15.conv3.*              | backbone.bottom_up.res4.15.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.16.conv1.*              | backbone.bottom_up.res4.16.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.16.conv2.*              | backbone.bottom_up.res4.16.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.16.conv3.*              | backbone.bottom_up.res4.16.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.17.conv1.*              | backbone.bottom_up.res4.17.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.17.conv2.*              | backbone.bottom_up.res4.17.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.17.conv3.*              | backbone.bottom_up.res4.17.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.18.conv1.*              | backbone.bottom_up.res4.18.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.18.conv2.*              | backbone.bottom_up.res4.18.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.18.conv3.*              | backbone.bottom_up.res4.18.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.19.conv1.*              | backbone.bottom_up.res4.19.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.19.conv2.*              | backbone.bottom_up.res4.19.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.19.conv3.*              | backbone.bottom_up.res4.19.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.2.conv1.*               | backbone.bottom_up.res4.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.2.conv2.*               | backbone.bottom_up.res4.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.2.conv3.*               | backbone.bottom_up.res4.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.20.conv1.*              | backbone.bottom_up.res4.20.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.20.conv2.*              | backbone.bottom_up.res4.20.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.20.conv3.*              | backbone.bottom_up.res4.20.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.21.conv1.*              | backbone.bottom_up.res4.21.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.21.conv2.*              | backbone.bottom_up.res4.21.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.21.conv3.*              | backbone.bottom_up.res4.21.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.22.conv1.*              | backbone.bottom_up.res4.22.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.22.conv2.*              | backbone.bottom_up.res4.22.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.22.conv3.*              | backbone.bottom_up.res4.22.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}   | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.3.conv1.*               | backbone.bottom_up.res4.3.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.3.conv2.*               | backbone.bottom_up.res4.3.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.3.conv3.*               | backbone.bottom_up.res4.3.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.4.conv1.*               | backbone.bottom_up.res4.4.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.4.conv2.*               | backbone.bottom_up.res4.4.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.4.conv3.*               | backbone.bottom_up.res4.4.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.5.conv1.*               | backbone.bottom_up.res4.5.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.5.conv2.*               | backbone.bottom_up.res4.5.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.5.conv3.*               | backbone.bottom_up.res4.5.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.6.conv1.*               | backbone.bottom_up.res4.6.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.6.conv2.*               | backbone.bottom_up.res4.6.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.6.conv3.*               | backbone.bottom_up.res4.6.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.7.conv1.*               | backbone.bottom_up.res4.7.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.7.conv2.*               | backbone.bottom_up.res4.7.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.7.conv3.*               | backbone.bottom_up.res4.7.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.8.conv1.*               | backbone.bottom_up.res4.8.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.8.conv2.*               | backbone.bottom_up.res4.8.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.8.conv3.*               | backbone.bottom_up.res4.8.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res4.9.conv1.*               | backbone.bottom_up.res4.9.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,1024,1,1)      |
| backbone.bottom_up.res4.9.conv2.*               | backbone.bottom_up.res4.9.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (256,) (256,) (256,) (256,) (256,256,3,3)       |
| backbone.bottom_up.res4.9.conv3.*               | backbone.bottom_up.res4.9.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (1024,) (1024,) (1024,) (1024,) (1024,256,1,1)  |
| backbone.bottom_up.res5.0.conv1.*               | backbone.bottom_up.res5.0.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (512,) (512,) (512,) (512,) (512,1024,1,1)      |
| backbone.bottom_up.res5.0.conv2.*               | backbone.bottom_up.res5.0.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (512,) (512,) (512,) (512,) (512,512,3,3)       |
| backbone.bottom_up.res5.0.conv3.*               | backbone.bottom_up.res5.0.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1)  |
| backbone.bottom_up.res5.0.shortcut.*            | backbone.bottom_up.res5.0.shortcut.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight} | (2048,) (2048,) (2048,) (2048,) (2048,1024,1,1) |
| backbone.bottom_up.res5.1.conv1.*               | backbone.bottom_up.res5.1.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (512,) (512,) (512,) (512,) (512,2048,1,1)      |
| backbone.bottom_up.res5.1.conv2.*               | backbone.bottom_up.res5.1.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (512,) (512,) (512,) (512,) (512,512,3,3)       |
| backbone.bottom_up.res5.1.conv3.*               | backbone.bottom_up.res5.1.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1)  |
| backbone.bottom_up.res5.2.conv1.*               | backbone.bottom_up.res5.2.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (512,) (512,) (512,) (512,) (512,2048,1,1)      |
| backbone.bottom_up.res5.2.conv2.*               | backbone.bottom_up.res5.2.conv2.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (512,) (512,) (512,) (512,) (512,512,3,3)       |
| backbone.bottom_up.res5.2.conv3.*               | backbone.bottom_up.res5.2.conv3.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}    | (2048,) (2048,) (2048,) (2048,) (2048,512,1,1)  |
| backbone.bottom_up.stem.conv1.*                 | backbone.bottom_up.stem.conv1.{norm.bias,norm.running_mean,norm.running_var,norm.weight,weight}      | (64,) (64,) (64,) (64,) (64,3,7,7)              |
| backbone.fpn_lateral2.*                         | backbone.fpn_lateral2.{bias,weight}                                                                  | (256,) (256,256,1,1)                            |
| backbone.fpn_lateral3.*                         | backbone.fpn_lateral3.{bias,weight}                                                                  | (256,) (256,512,1,1)                            |
| backbone.fpn_lateral4.*                         | backbone.fpn_lateral4.{bias,weight}                                                                  | (256,) (256,1024,1,1)                           |
| backbone.fpn_lateral5.*                         | backbone.fpn_lateral5.{bias,weight}                                                                  | (256,) (256,2048,1,1)                           |
| backbone.fpn_output2.*                          | backbone.fpn_output2.{bias,weight}                                                                   | (256,) (256,256,3,3)                            |
| backbone.fpn_output3.*                          | backbone.fpn_output3.{bias,weight}                                                                   | (256,) (256,256,3,3)                            |
| backbone.fpn_output4.*                          | backbone.fpn_output4.{bias,weight}                                                                   | (256,) (256,256,3,3)                            |
| backbone.fpn_output5.*                          | backbone.fpn_output5.{bias,weight}                                                                   | (256,) (256,256,3,3)                            |
| proposal_generator.rpn_head.anchor_deltas.*     | proposal_generator.rpn_head.anchor_deltas.{bias,weight}                                              | (12,) (12,256,1,1)                              |
| proposal_generator.rpn_head.conv.*              | proposal_generator.rpn_head.conv.{bias,weight}                                                       | (256,) (256,256,3,3)                            |
| proposal_generator.rpn_head.objectness_logits.* | proposal_generator.rpn_head.objectness_logits.{bias,weight}                                          | (3,) (3,256,1,1)                                |
| roi_heads.box_head.fc1.*                        | roi_heads.box_head.fc1.{bias,weight}                                                                 | (1024,) (1024,12544)                            |
| roi_heads.box_head.fc2.*                        | roi_heads.box_head.fc2.{bias,weight}                                                                 | (1024,) (1024,1024)                             |
| roi_heads.box_predictor.bbox_pred.*             | roi_heads.box_predictor.bbox_pred.{bias,weight}                                                      | (4,) (4,1024)                                   |
| roi_heads.box_predictor.cls_score.*             | roi_heads.box_predictor.cls_score.{bias,weight}                                                      | (2,) (2,1024)                                   |
| roi_heads.mask_head.deconv.*                    | roi_heads.mask_head.deconv.{bias,weight}                                                             | (256,) (256,256,2,2)                            |
| roi_heads.mask_head.mask_fcn1.*                 | roi_heads.mask_head.mask_fcn1.{bias,weight}                                                          | (256,) (256,256,3,3)                            |
| roi_heads.mask_head.mask_fcn2.*                 | roi_heads.mask_head.mask_fcn2.{bias,weight}                                                          | (256,) (256,256,3,3)                            |
| roi_heads.mask_head.mask_fcn3.*                 | roi_heads.mask_head.mask_fcn3.{bias,weight}                                                          | (256,) (256,256,3,3)                            |
| roi_heads.mask_head.mask_fcn4.*                 | roi_heads.mask_head.mask_fcn4.{bias,weight}                                                          | (256,) (256,256,3,3)                            |
| roi_heads.mask_head.predictor.*                 | roi_heads.mask_head.predictor.{bias,weight}                                                          | (1,) (1,256,1,1)                                |

Then, we randomly select several samples to visualize the prediction results.

In [31]:
def get_mask(im):
    outputs = predictor(im)
    max_index = outputs['instances'].scores.tolist().index(max(outputs['instances'].scores.tolist()))
    mask = outputs['instances'].pred_masks[max_index].tolist()
    mask = np.array(mask).reshape(im.shape[0],im.shape[1],1)*im
    x1 = outputs['instances'].pred_boxes.tensor.tolist()[max_index][0] 
    y1 = outputs['instances'].pred_boxes.tensor.tolist()[max_index][1]
    x2 = outputs['instances'].pred_boxes.tensor.tolist()[max_index][2]
    y2 = outputs['instances'].pred_boxes.tensor.tolist()[max_index][3]
    #imshow(mask[int(y1):int(y2),int(x1):int(x2),:])
    print(y2,y1,x2,x1,(y2-y1)*(x2-x1))
    return [int(y1),int(y2),int(x1),int(x2)]
In [32]:
im1 = get_mask(im)
/root/anaconda3/lib/python3.9/site-packages/torch/nn/functional.py:718: UserWarning: Named tensors and all their associated APIs are an experimental feature and subject to change. Please do not use them for anything important until they are released as stable. (Triggered internally at  /pytorch/c10/core/TensorImpl.h:1156.)
  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)
1468.1954345703125 1293.2349853515625 1364.9661865234375 1215.608642578125 26131.66298288107
In [33]:
%%time
#-- 5-10 seconds ## Loading to prediction to output

im = imread("/root/raghav/labelstudio/"+k)
outputs = predictor(im)  # format is documented at https://detectron2.readthedocs.io/tutorials/models.html#model-output-format
v = Visualizer(im[:, :, ::-1],
                scale=0.75, 
                   # remove the colors of unsegmented pixels. This option is only available for segmentation models
)
out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
CPU times: user 1.3 s, sys: 130 ms, total: 1.43 s
Wall time: 1.3 s
In [34]:
box_dict = {}
for i in dataset_dicts['images']:
    k = i['file_name']
    im = cv2.imread("/root/raghav/labelstudio/"+k)
    try:
        a = get_mask(im)
        box_dict[k] = a
        print(k,a)
    except:
        pass 
2213.11376953125 1993.5338134765625 830.9368896484375 492.2952880859375 74358.90798938274
images/11/bdcfba0e-14.jpg [1993, 2213, 492, 830]
1363.915771484375 1173.44189453125 895.8938598632812 744.4111328125 28853.502312794328
images/11/82c7c2ca-11.jpg [1173, 1363, 744, 895]
2331.93896484375 1381.3143310546875 2074.70458984375 1302.9000244140625 733696.4323683232
images/11/7f355d57-2.JPG [1381, 2331, 1302, 2074]
2240.393310546875 2084.628662109375 1066.9970703125 852.8511962890625 33356.356781601906
images/11/03865ad2-25.jpg [2084, 2240, 852, 1066]
2606.329345703125 2562.671875 1242.3629150390625 1044.86328125 8622.334476023912
images/11/a525426b-24.jpg [2562, 2606, 1044, 1242]
1468.1954345703125 1293.2349853515625 1364.9661865234375 1215.608642578125 26131.66298288107
images/11/a29d1440-23.jpg [1293, 1468, 1215, 1364]
219.8627166748047 178.30796813964844 95.97936248779297 44.45733642578125 2140.9848370286636
images/11/f66ef193-22.jpg [178, 219, 44, 95]
2941.13623046875 2716.9697265625 986.3261108398438 761.4359130859375 50412.84939327836
images/11/30b24faf-21.jpg [2716, 2941, 761, 986]
677.9794311523438 659.8280639648438 513.8724365234375 437.2713317871094 1390.4147790372372
images/11/3576ba5b-20.jpg [659, 677, 437, 513]
2639.767822265625 2587.56787109375 1023.1159057617188 715.7958374023438 16042.092562496662
images/11/39fa94e9-19.jpg [2587, 2639, 715, 1023]
2019.7305908203125 1840.5123291015625 1615.9908447265625 1070.1298828125 97828.25273436308
images/11/659fbde8-18.jpg [1840, 2019, 1070, 1615]
Invalid SOS parameters for sequential JPEG
700.133056640625 466.6375427246094 357.508544921875 192.16351318359375 38607.323159189895
images/11/8d9688df-17.jpg [466, 700, 192, 357]
874.297607421875 789.6838989257812 379.7408142089844 239.175537109375 11893.74938117899
images/11/96c90a35-16.jpg [789, 874, 239, 379]
1686.704833984375 1571.3988037109375 1483.6629638671875 1255.39990234375 26320.10748232901
images/11/69db667b-15.JPG [1571, 1686, 1255, 1483]
1818.5491943359375 1453.2490234375 1711.244384765625 634.609619140625 393294.8638780117
images/11/dec01bf9-13.jpg [1453, 1818, 634, 1711]
651.0650634765625 590.3082885742188 497.3738708496094 304.7451477050781 11703.499971818179
images/11/52df14cb-12.jpg [590, 651, 304, 497]
838.8355712890625 776.0136108398438 432.61199951171875 317.3257141113281 7242.510461760685
images/11/e73b6357-10.jpg [776, 838, 317, 432]
459.82696533203125 301.2104187011719 429.2563781738281 168.19210815429688 41409.11295920424
images/11/765ef5c1-9.jpg [301, 459, 168, 429]
2362.1552734375 2119.595703125 1963.114501953125 1738.6917724609375 54435.88083398342
images/11/a7bd461c-8.JPG [2119, 2362, 1738, 1963]
2068.41015625 1846.001708984375 1182.108154296875 987.1857299804688 43352.39372946322
images/11/de03b894-7.jpg [1846, 2068, 987, 1182]
2530.751220703125 2101.198486328125 2005.325927734375 748.83349609375 539729.7597327232
images/11/fbbba93f-6.JPG [2101, 2530, 748, 2005]
5233.74072265625 5162.193359375 3329.501708984375 2801.6298828125 37767.83731305599
images/11/23c6bf74-5.jpg [5162, 5233, 2801, 3329]
1832.986328125 1202.7701416015625 2418.65869140625 722.2001953125 1069135.604003489
images/11/0571f85d-4.jpg [1202, 1832, 722, 2418]
2204.8984375 2120.737548828125 1556.328125 1272.4105224609375 23894.757739275694
images/11/8cde8e52-3.jpg [2120, 2204, 1272, 1556]
1571.1124267578125 1395.929443359375 1528.3304443359375 835.5157470703125 121369.34560927749
images/11/16bee31b-1.jpg [1395, 1571, 835, 1528]
2421.9970703125 2291.085205078125 1170.643310546875 914.7128295898438 33504.33663241565
images/11/dd70db44-IMG_0343.JPG [2291, 2421, 914, 1170]
2291.00048828125 2222.3349609375 1423.87548828125 1208.0472412109375 14819.960400760174
images/11/ce39d6ee-IMG_0335.JPG [2222, 2291, 1208, 1423]
2461.2373046875 2374.244140625 1362.6220703125 1156.7894287109375 17906.03276026249
images/11/acef4032-IMG_0342.JPG [2374, 2461, 1156, 1362]
2420.017333984375 2333.370361328125 1094.765625 955.7530517578125 12045.01863259077
images/11/3a9d0723-IMG_0346.JPG [2333, 2420, 955, 1094]
2264.532470703125 2109.755859375 1176.8778076171875 955.6858520507812 34235.341335609555
images/11/060aaccc-IMG_0345.JPG [2109, 2264, 955, 1176]
2844.13427734375 2744.190673828125 1549.469970703125 1308.5311279296875 24080.296173661947
images/11/e0400583-IMG_0347.JPG [2744, 2844, 1308, 1549]
2548.83203125 2372.49365234375 1235.7537841796875 1201.0631103515625 6117.297186017036
images/11/5d1b016a-IMG_0336.JPG [2372, 2548, 1201, 1235]
2362.9150390625 2179.443115234375 1196.54736328125 808.3644409179688 71220.6675632149
images/11/abfcc678-IMG_0339.JPG [2179, 2362, 808, 1196]
2897.173583984375 2782.49951171875 1304.4310302734375 1115.6552734375 21647.68478140235
images/11/9152cc57-IMG_0349.JPG [2782, 2897, 1115, 1304]
2178.522216796875 2076.779541015625 1245.202392578125 1092.352294921875 15551.377928972244
images/11/5fae8912-IMG_0338.JPG [2076, 2178, 1092, 1245]
2412.2109375 2342.7880859375 1161.607421875 936.3329467773438 15639.196445524693
images/11/4946c5fb-IMG_0331.JPG [2342, 2412, 936, 1161]
2663.8896484375 2582.580322265625 1501.8231201171875 1337.578125 13354.649880081415
images/11/cd6ea9ce-IMG_0355.JPG [2582, 2663, 1337, 1501]
2500.86376953125 2378.303466796875 1399.1336669921875 1174.580078125 27521.35583165288
images/11/7b2945ef-IMG_0340.JPG [2378, 2500, 1174, 1399]
2623.43359375 2518.29931640625 1416.4542236328125 1140.50537109375 29011.68319553137
images/11/8b4b117d-IMG_0344.JPG [2518, 2623, 1140, 1416]
3071.157958984375 2982.8095703125 1514.810791015625 1320.9766845703125 17124.93097409606
images/11/a2f9206c-IMG_0353.JPG [2982, 3071, 1320, 1514]
2640.319091796875 2424.7216796875 1115.467529296875 1050.1514892578125 14081.969201654196
images/11/e4f2229d-IMG_0337.JPG [2424, 2640, 1050, 1115]
2666.634033203125 2564.0966796875 1476.0267333984375 1315.259765625 16484.619408220053
images/11/1af4a433-IMG_0354.JPG [2564, 2666, 1315, 1476]
2295.868896484375 2175.04541015625 1271.450439453125 1053.2735595703125 26360.891263633966
images/11/8f5d6a91-IMG_0332.JPG [2175, 2295, 1053, 1271]
2162.60546875 1843.21630859375 1079.6893310546875 856.0884399414062 71415.70082286
images/11/b6e1149a-IMG_0329.JPG [1843, 2162, 856, 1079]
2673.288818359375 2581.06787109375 1232.3536376953125 1109.6611328125 11314.81902268529
images/11/6d9d2d6f-IMG_0359.JPG [2581, 2673, 1109, 1232]
2560.627685546875 2424.855712890625 1428.484130859375 1202.532958984375 30677.836329460144
images/11/8ce904be-IMG_0348.JPG [2424, 2560, 1202, 1428]
2360.52734375 2250.020751953125 823.954345703125 709.2855224609375 12671.660841852427
images/11/438005ea-IMG_0326.JPG [2250, 2360, 709, 823]
2639.96337890625 2552.760986328125 1185.3531494140625 1029.632080078125 13579.249820917845
images/11/ed89dda7-IMG_0321.JPG [2552, 2639, 1029, 1185]
2366.402099609375 2242.737548828125 1260.3187255859375 1159.99072265625 12407.017413079739
images/11/8b92b444-IMG_0322.JPG [2242, 2366, 1159, 1260]
2312.3779296875 2202.75732421875 1507.800537109375 1363.369873046875 15832.576842784882
images/11/f2b94273-IMG_0350.JPG [2202, 2312, 1363, 1507]
2284.098876953125 2183.12939453125 1217.2496337890625 1069.9322509765625 14874.559894323349
images/11/a99d9f5b-IMG_0323.JPG [2183, 2284, 1069, 1217]
2510.920166015625 2466.939208984375 1255.4263916015625 1028.3677978515625 9986.2542552948
images/11/2f68429a-IMG_0333.JPG [2466, 2510, 1028, 1255]
2524.831787109375 2452.073486328125 1243.888671875 999.2626342773438 17798.574822455645
images/11/62c4d761-IMG_0334.JPG [2452, 2524, 999, 1243]
2609.711669921875 2428.610107421875 1509.612548828125 1422.8714599609375 15708.946726799011
images/11/9a34f8a1-IMG_0320.JPG [2428, 2609, 1422, 1509]
2455.18310546875 2239.640869140625 1230.784912109375 1044.853271484375 40076.12162446976
images/11/838b8505-IMG_0330.JPG [2239, 2455, 1044, 1230]
2781.961669921875 2687.9482421875 1193.9986572265625 1006.627197265625 17615.433210521936
images/11/f2724ae4-IMG_0319.JPG [2687, 2781, 1006, 1193]
2901.995361328125 2736.460205078125 1052.5987548828125 909.5809326171875 23674.477555274963
images/11/e1a5c9ad-IMG_0312.JPG [2736, 2901, 909, 1052]
2775.4765625 2449.23095703125 1285.741455078125 1147.0177001953125 45258.01540464163
images/11/9259c363-IMG_0311.JPG [2449, 2775, 1147, 1285]
2043.6812744140625 1712.791748046875 1058.3525390625 858.6241455078125 66088.03354538977
images/11/7e632d99-IMG_0306.JPG [1712, 2043, 858, 1058]
2407.583251953125 2071.169677734375 1439.5577392578125 1090.9072265625 117290.76512902975
images/11/cfb5b88a-IMG_0301.JPG [2071, 2407, 1090, 1439]
2286.68017578125 2024.009033203125 1225.962646484375 1172.255126953125 14107.415520310402
images/11/60708a60-IMG_0316.JPG [2024, 2286, 1172, 1225]
2693.58154296875 2096.812255859375 1313.10888671875 1065.4344482421875 147804.49808487296
images/11/e3423fba-IMG_0308.JPG [2096, 2693, 1065, 1313]
2766.928955078125 2504.139892578125 1445.3353271484375 1165.5391845703125 73527.3659992218
images/11/9505cd1e-IMG_0299.JPG [2504, 2766, 1165, 1445]
2517.968017578125 2444.506591796875 1361.67626953125 1158.127685546875 14952.969195246696
images/11/7fc61b3a-IMG_0318.JPG [2444, 2517, 1158, 1361]
2673.554931640625 2386.7216796875 1210.613037109375 1152.922119140625 16547.673609137535
images/11/859c9222-IMG_0313.JPG [2386, 2673, 1152, 1210]
2849.074951171875 2550.048583984375 1401.039794921875 1093.5897216796875 91935.67849314213
images/11/4bd013d1-IMG_0303.JPG [2550, 2849, 1093, 1401]
2191.798095703125 2101.673828125 1240.1917724609375 1063.435302734375 15930.04737380147
images/11/a1a4c73b-IMG_0295.JPG [2101, 2191, 1063, 1240]
2809.42724609375 2504.707763671875 1345.116455078125 1024.350341796875 97743.68401753902
images/11/74f14799-IMG_0300.JPG [2504, 2809, 1024, 1345]
2627.28173828125 2445.81884765625 1179.113525390625 876.8929443359375 54841.820244550705
images/11/bf411d4c-IMG_0294.JPG [2445, 2627, 876, 1179]
2829.277099609375 2642.719482421875 1390.011474609375 1118.26611328125 50696.16709113121
images/11/a3bb42c6-IMG_0302.JPG [2642, 2829, 1118, 1390]
2691.045166015625 2624.855712890625 1338.711669921875 1152.5341796875 12322.98626279831
images/11/6cf19201-IMG_0309.JPG [2624, 2691, 1152, 1338]
2309.70166015625 2118.62158203125 867.4765625 660.2514038085938 39596.59951221943
images/11/5b1dbf69-IMG_E0297.JPG [2118, 2309, 660, 867]
2851.448974609375 2604.238525390625 1466.26806640625 1299.998779296875 41103.50515758991
images/11/83fd0d53-IMG_0304.JPG [2604, 2851, 1299, 1466]
2720.51611328125 2446.739013671875 1543.4952392578125 1267.1517333984375 75656.52353006601
images/11/9f7b7a1f-IMG_0298.JPG [2446, 2720, 1267, 1543]
In [35]:
list(box_dict.items())[0:6]
Out[35]:
[('images/11/bdcfba0e-14.jpg', [1993, 2213, 492, 830]),
 ('images/11/82c7c2ca-11.jpg', [1173, 1363, 744, 895]),
 ('images/11/7f355d57-2.JPG', [1381, 2331, 1302, 2074]),
 ('images/11/03865ad2-25.jpg', [2084, 2240, 852, 1066]),
 ('images/11/a525426b-24.jpg', [2562, 2606, 1044, 1242]),
 ('images/11/a29d1440-23.jpg', [1293, 1468, 1215, 1364])]
In [37]:
for a,b in list(box_dict.items())[0:6]:
    im = cv2.imread("/root/raghav/labelstudio/"+a)
    outputs = predictor(im) 
    v = Visualizer(im[:, :, ::-1],
                scale=1, 
                #instance_mode=ColorMode.IMAGE_BW   # remove the colors of unsegmented pixels. This option is only available for segmentation models
)

    out = v.draw_instance_predictions(outputs["instances"].to("cpu"))
    actual = im[b[0]:b[1],b[2]:b[3],:]
    #actual = imshow(a)
    #pred = get_mask(b)
    pred = out.get_image()[b[0]:b[1],b[2]:b[3],:]
    #pred = out.get_image(b)
    fig, ax = plt.subplots(1,4, figsize = (32,24))

    ax[0].imshow(actual[:,:,::-1])
    ax[2].imshow(pred[:,:,::-1])
    ax[1].imshow(im[:,:,::-1])
    ax[3].imshow(out.get_image())
    print(outputs['instances'])
    
   
    ax[0].set_title(f"original imag")
    ax[2].set_title(f"masked Image - contours")
    ax[1].set_title('actual image')
    ax[3].set_title('image pred')
    
    
    fig.show()
Instances(num_instances=5, image_height=4032, image_width=1908, fields=[pred_boxes: Boxes(tensor([[ 492.2953, 1993.5338,  830.9369, 2213.1138],
        [ 725.5929, 1968.1691,  854.2525, 2078.5449],
        [ 476.6273, 2176.9695,  594.9737, 2228.3889],
        [ 652.2195, 1976.4672,  850.7067, 2128.5671],
        [ 474.9111, 2165.6458,  605.6243, 2207.3381]], device='cuda:0')), scores: tensor([0.9918, 0.9413, 0.9237, 0.6282, 0.5230], device='cuda:0'), pred_classes: tensor([0, 0, 0, 0, 0], device='cuda:0'), pred_masks: tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]]], device='cuda:0')])
Instances(num_instances=6, image_height=2730, image_width=1535, fields=[pred_boxes: Boxes(tensor([[ 744.4111, 1173.4419,  895.8939, 1363.9158],
        [ 433.5776, 1218.7476,  891.2090, 1562.3873],
        [ 659.9612, 1296.2074,  792.5440, 1461.4200],
        [ 434.7644, 1445.7448,  739.1277, 1537.2412],
        [ 663.5048, 1208.4712,  871.9982, 1447.9796],
        [ 417.7054, 1490.4789,  641.8448, 1538.8746]], device='cuda:0')), scores: tensor([0.9799, 0.9729, 0.7588, 0.6976, 0.6229, 0.5114], device='cuda:0'), pred_classes: tensor([0, 0, 0, 0, 0, 0], device='cuda:0'), pred_masks: tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]]], device='cuda:0')])
Instances(num_instances=2, image_height=4032, image_width=3024, fields=[pred_boxes: Boxes(tensor([[1302.9000, 1381.3143, 2074.7046, 2331.9390],
        [1632.3135, 1363.0688, 2009.4152, 1898.0432]], device='cuda:0')), scores: tensor([0.9825, 0.9427], device='cuda:0'), pred_classes: tensor([0, 0], device='cuda:0'), pred_masks: tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]]], device='cuda:0')])
Instances(num_instances=3, image_height=4032, image_width=1960, fields=[pred_boxes: Boxes(tensor([[ 852.8512, 2084.6287, 1066.9971, 2240.3933],
        [1000.0120, 2069.0386, 1080.1000, 2121.9392],
        [ 839.3368, 2158.1443,  967.0345, 2258.1899]], device='cuda:0')), scores: tensor([0.9907, 0.9373, 0.9294], device='cuda:0'), pred_classes: tensor([0, 0, 0], device='cuda:0'), pred_masks: tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]]], device='cuda:0')])
Instances(num_instances=1, image_height=4032, image_width=3024, fields=[pred_boxes: Boxes(tensor([[1044.8633, 2562.6719, 1242.3629, 2606.3293]], device='cuda:0')), scores: tensor([0.5531], device='cuda:0'), pred_classes: tensor([0], device='cuda:0'), pred_masks: tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]]], device='cuda:0')])
Instances(num_instances=5, image_height=4032, image_width=3024, fields=[pred_boxes: Boxes(tensor([[1215.6086, 1293.2350, 1364.9662, 1468.1954],
        [1054.0917, 1463.2124, 1256.2151, 1563.3062],
        [1053.2366, 1317.3365, 1361.6282, 1544.8196],
        [1268.5144, 1283.4891, 1364.2344, 1413.8016],
        [1045.9758, 1496.9252, 1218.1167, 1571.3212]], device='cuda:0')), scores: tensor([0.9762, 0.9700, 0.8129, 0.6798, 0.6142], device='cuda:0'), pred_classes: tensor([0, 0, 0, 0, 0], device='cuda:0'), pred_masks: tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]]], device='cuda:0')])
In [38]:
outputs['instances']
Out[38]:
Instances(num_instances=5, image_height=4032, image_width=3024, fields=[pred_boxes: Boxes(tensor([[1215.6086, 1293.2350, 1364.9662, 1468.1954],
        [1054.0917, 1463.2124, 1256.2151, 1563.3062],
        [1053.2366, 1317.3365, 1361.6282, 1544.8196],
        [1268.5144, 1283.4891, 1364.2344, 1413.8016],
        [1045.9758, 1496.9252, 1218.1167, 1571.3212]], device='cuda:0')), scores: tensor([0.9762, 0.9700, 0.8129, 0.6798, 0.6142], device='cuda:0'), pred_classes: tensor([0, 0, 0, 0, 0], device='cuda:0'), pred_masks: tensor([[[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]],

        [[False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         ...,
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False],
         [False, False, False,  ..., False, False, False]]], device='cuda:0')])
In [ ]:
 

¶